Arrange Data
duplicated(iris)
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[81] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[101] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[131] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[141] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
class(iris$Species)
[1] "factor"
iris$Species[1:5]
[1] setosa setosa setosa setosa setosa
Levels: setosa versicolor virginica
Arrange Table
library(ggplot2)
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(dplyr)
head(iris, 10)
count(iris, Species)
Mean & Standard Deviation
library(DT)
new_iris <- iris %>% group_by(Species) %>% summarise(Avg_Sepal_Length = mean(Sepal.Length), SD_Sepal_Length = sd(Sepal.Length), Avg_Sepal_Width = mean(Sepal.Width), SD_Sepal_Width = sd(Sepal.Width), Avg_Petal_Length = mean(Petal.Length), SD_Petal_Length = sd(Petal.Length), Avg_Petal_Width = mean(Petal.Width), SD_Petal_Width = sd(Petal.Width))
datatable(new_iris, caption = "Mean & Standard Deviation") %>% formatRound(2:5, digits = 2)
Scatter Plot
p1 <- ggplot(iris, aes(x = Sepal.Length, y = Petal.Length, color = Species, shape = Species)) + geom_point(size = 3) + ggtitle('Petal Length vs Sepal Length') + geom_smooth(method = 'lm')
ggplotly(p1)
`geom_smooth()` using formula 'y ~ x'
Generalized Pair Plot
library(GGally)
p2 <- ggpairs(iris, columns = 1:4, aes(color = Species)) + ggtitle("Iris Dataset --- 3 Species")
p2
plot: [1,1] [=>------------------------------------] 6% est: 0s
plot: [1,2] [====>---------------------------------] 12% est: 1s
plot: [1,3] [======>-------------------------------] 19% est: 1s
plot: [1,4] [=========>----------------------------] 25% est: 1s
plot: [2,1] [===========>--------------------------] 31% est: 1s
plot: [2,2] [=============>------------------------] 38% est: 1s
plot: [2,3] [================>---------------------] 44% est: 1s
plot: [2,4] [==================>-------------------] 50% est: 1s
plot: [3,1] [====================>-----------------] 56% est: 1s
plot: [3,2] [=======================>--------------] 62% est: 1s
plot: [3,3] [=========================>------------] 69% est: 1s
plot: [3,4] [===========================>----------] 75% est: 0s
plot: [4,1] [==============================>-------] 81% est: 0s
plot: [4,2] [================================>-----] 88% est: 0s
plot: [4,3] [===================================>--] 94% est: 0s
plot: [4,4] [======================================]100% est: 0s
p3 <- pairs(iris[1:4], lower.panel = NULL, main = "Anderson's Iris Data -- 3 species", pch = 21, bg = c("red", "green3", "blue")[unclass(iris$Species)])
p3
NULL
Parallel Coordinate Plot
p4 <- ggparcoord(data = iris, columns = 1:4, groupColumn = "Species")
ggplotly(p4)
Histogram
p6 <- ggplot(data = iris, aes(x = Sepal.Length)) + geom_histogram(binwidth = 0.2, color="black", aes(fill = Species)) + xlab("Sepal Length") + ylab("Frequency") + ggtitle("Histogram of Sepal Length") + geom_vline(data = iris, aes(xintercept = mean(Sepal.Length)), linetype = "dashed", color="grey")
ggplotly(p6)
Density Plot
p7 <- ggplot(iris, aes(x = Sepal.Width, colour = Species, fill = Species)) + geom_density(alpha = .5) + geom_vline(aes(xintercept = mean(Sepal.Width)), linetype = "dashed", color = "grey", size = 1) + xlab("Sepal Width") + ylab("Density")
ggplotly(p7)
Box Plot
p8 <- ggplot(iris, aes(Species, Petal.Length, fill = Species)) + geom_boxplot() + scale_y_continuous("Petal Length", breaks = seq(0, 10, by = .5)) + labs(title = "Iris Petal Length Box Plot", x = "Species")
ggplotly(p8)
Violin Plot
p9 <- ggplot(iris, aes(Species, Petal.Width, fill = Species)) + scale_y_continuous("Petal Width", breaks = seq(0, 10, by = .5)) + labs(title = "Iris Petal Width Violin Plot", x = "Species") + geom_violin(aes(color = Species), alpha = .75)
ggplotly(p9)